# -*- coding: utf-8 -*-
import sys
import jieba
import re

class UserData:
    def __init__(self, ahtc, grjn, shsjjljrzqk, hjjfbzpqk, zykyhdhxshd, grzjhqzyj):
        self.ahtc = ahtc
        self.grjn = grjn
        self.shsjjljrzqk = shsjjljrzqk
        self.hjjfbzpqk = hjjfbzpqk
        self.zykyhdhxshd = zykyhdhxshd
        self.grzjhqzyj = grzjhqzyj

    def process_data(self):
        keywords1 = ['数据分析', '视频剪辑', '摄影', '编程', '电脑', '绘画', '计算机', '二级', '证书', '英语六级',
                     '资格证', '四级', '证券', '普通话', '四六级', '驾驶证', '资格证书', '六级', '职业', '甲等']
        keywords2 = ['奖学金', '校级', '优秀学生', '院级', '励志', '年度', '发表', '大赛', '证书', '计算机']
        keywords3 = ['研究', '项目', '课题', '论文', '报告', '立项', '发展', '学术', '撰写', '大赛', '论坛']

        # 计算每个关键词集合的最大得分
        max_score1 = len(keywords1)
        max_score2 = len(keywords2)
        max_score3 = len(keywords3)

        # 为每条文本计算得分
        score1 = score_text(self.grjn, keywords1)
        score2 = score_text(self.hjjfbzpqk, keywords2)
        score3 = score_text(self.zykyhdhxshd, keywords3)

        # 归一化得分
        normalized_score1 = (score1 / max_score1) * 5 if max_score1 > 0 else 0
        normalized_score2 = (score2 / max_score2) * 5 if max_score2 > 0 else 0
        normalized_score3 = (score3 / max_score3) * 5 if max_score3 > 0 else 0

        score = (normalized_score1+normalized_score2+normalized_score3)/3
        return score


# 文本预处理函数
def prepare_text(text):
    stopwords = ['年月日']
    words = jieba.lcut(text)
    words = [word for word in words if len(word) > 1]
    words = [word for word in words if word not in stopwords]
    content = ' '.join(words)
    content = ' '.join(re.findall('[\u4e00-\u9fa5]+', content))
    return content


# 关键词匹配得分函数
def score_text(text, keywords):
    # 预处理文本
    processed_text = prepare_text(text)
    # 分词
    words = jieba.lcut(processed_text)
    # 初始化得分
    score = 0
    # 遍历关键词，统计出现频率并计算得分
    for keyword in keywords:
        score += words.count(keyword)
    return score


# 运行主函数
if __name__ == "__main__":
    # 从命令行参数中获取字符串数据
    grjn = sys.argv[1]
    hjjfbzpqk = sys.argv[2]
    zykyhdhxshd = sys.argv[3]
    # 创建 UserData 类的实例
    user_data = UserData('ahtc', grjn, 'shsjjljrzqk', hjjfbzpqk, zykyhdhxshd, 'grzjhqzyj')
    # 处理用户数据
    result = user_data.process_data()
    # 打印结果
    print(f"{result:.2f}")  # 打印第一个得分，保留两位小数